All scripts and outputs can be found on the SEACAR GitHub repository:
https://github.com/FloridaSEACAR/SEACAR_Panzik
Note: The top 2% of data is excluded when computing mean and standard deviations in plotting sections solely for the purpose of getting y-axis scales. The exclusion of the top 2% is not used in any statistics that are exported.
Loads libraries used in the script. Loads the Segoe UI font for use
in the figures. The inclusion of scipen option limits how
frequently R defaults to scientific notation. Sets default settings for
displaying warning and messages in created document, and sets figure
dpi.
library(knitr)
library(data.table)
library(plyr)
library(dplyr)
library(lubridate)
library(ggplot2)
library(ggpubr)
library(scales)
library(EnvStats)
library(tidyr)
library(stringr)
library(kableExtra)
windowsFonts(`Segoe UI`=windowsFont('Segoe UI'))
options(scipen=999)
opts_chunk$set(warning=FALSE, message=FALSE, dpi=200)
Imports file that is determined in the WC_Discrete_parameter_ReportCompile.R script.
The command fread is used because of its improved speed
while handling large data files. Only columns that are used by the
script are imported from the file, and are designated in the
select input.
The script then gets the name of the parameter as it appears in the
data file, units of the parameter, sets the SampleDate as a
date object, and creates various scales of the date to be used by
plotting functions.
data <- fread(file_in, sep="|", header=TRUE, stringsAsFactors=FALSE,
select=c("ManagedAreaName", "ProgramID", "ProgramName",
"ProgramLocationID", "SampleDate", "Year", "Month",
"RelativeDepth", "ActivityType", "ParameterName",
"ResultValue", "ParameterUnits", "ValueQualifier",
"SEACAR_QAQCFlagCode", "Include"), na.strings="")
parameter <- unique(data$ParameterName)
unit <- unique(data$ParameterUnits)
data$SampleDate <- as.Date(data$SampleDate)
data$YearMonth <- paste0(data$Month, "-", data$Year)
data$YearMonthDec <- data$Year + ((data$Month-0.5) / 12)
data$DecDate <- decimal_date(data$SampleDate)
data[, `:=` (relyear=Year - min(Year), relyear_dd=DecDate - min(DecDate)),
by="ManagedAreaName"]
data <- data[ParameterName==parameter & str_detect(ActivityType, activity) &
RelativeDepth==depth & Include==1, ]
Most data filtering is performed on export from the database, and is
indicated by the Include variable. Include
values of 1 indicate the data should be used for analysis, values of 0
indicate the data should not be used for analysis. Documentation on the
database filtering is provided here: SEACAR
Documentation- Analysis Filters and Calculations.docx
The filtering that is performed by the script at this point removes
rows that are missing values for ResultValue, and only
keeps data that is measured at the relative depth (surface, bottom,
etc.) and activity type (field or sample) of interest. This is partly
handled on export with the RelativeDepth variable, but
there are some measurements that are considered both surface and bottom
based on measurement depth and total depth. By default, these are marked
as Surface for RelativeDepth and receive a
SEACAR_QAQCFlag indicator of 12Q. Data passes
the filtering the process if it is from the correct depth and has an
Include value of 1. The script also only looks at data of
the desired ActivityType which indicates whether it was
measured in the field (Field) or in the lab
(Sample).
After the initial filtering, a second filter variable is created to determine whether enough time is represented in the managed area, which is that each managed area has 10 year or more of unique year entries for observation that pass the initial filter. If data passes the first set of filtering criteria and the time criteria, they are used in the analysis.
After filtering, the amount of data impacted by the H (for dissolved oxygen & pH in program 476), I, Q, S (for Secchi depth), and U value qualifiers. A variable is also created that determines if scatter plot points should be a different color based on value qualifiers of interest.
# param_name <- "Water_Temperature"
# out_dir <- here::here("WQ_Discrete/output/by_parameter/")
# APP_Plots <- TRUE
if(depth=="Bottom"){
data$RelativeDepth[grep("12Q", data$SEACAR_QAQCFlagCode[
data$RelativeDepth=="Surface"])] <- "Bottom"
}
data$Include <- as.logical(data$Include)
data$Include[grep("H", data$ValueQualifier[data$ProgramID==476])] <- TRUE
data <- data[!is.na(data$ResultValue),]
if(param_name!="Secchi_Depth"){
data <- data[!is.na(data$RelativeDepth),]
data <- data[data$RelativeDepth==depth,]
}
if(length(grep("Blank", data$ActivityType))>0){
data <- data[-grep("Blank", data$ActivityType),]
}
if(param_name=="Chlorophyll_a_uncorrected_for_pheophytin" |
param_name=="Salinity" | param_name=="Turbidity"){
data <- data[grep(activity, data$ActivityType[!is.na(data$ActivityType)]),]
}
if(param_name=="Water_Temperature"){
data <- data[data$ResultValue>=-2,]
} else{
data <- data[data$ResultValue>=0,]
}
data <- merge.data.frame(MA_All[,c("AreaID", "ManagedAreaName")],
data, by="ManagedAreaName", all=TRUE)
MA_Summ <- data %>%
group_by(AreaID, ManagedAreaName) %>%
dplyr::summarize(ParameterName=parameter,
RelativeDepth=depth,
ActivityType=activity,
N_Data=length(ResultValue[Include==TRUE & !is.na(ResultValue)]),
N_Years=length(unique(Year[Include==TRUE & !is.na(Year)])),
EarliestYear=min(Year[Include==TRUE]),
LatestYear=max(Year[Include==TRUE]),
SufficientData=ifelse(N_Data>0 & N_Years>=10, TRUE, FALSE))
data <- merge.data.frame(data, MA_Summ[,c("ManagedAreaName", "SufficientData")],
by="ManagedAreaName")
data$Use_In_Analysis <- ifelse(data$Include==TRUE & data$SufficientData==TRUE,
TRUE, FALSE)
MA_Summ <- MA_Summ %>%
select(AreaID, ManagedAreaName, ParameterName, RelativeDepth, ActivityType,
SufficientData, everything())
MA_Summ <- as.data.frame(MA_Summ[order(MA_Summ$ManagedAreaName), ])
total <- length(data$Include)
pass_filter <- length(data$Include[data$Include==TRUE])
count_H <- length(grep("H", data$ValueQualifier[data$ProgramID==476]))
perc_H <- 100*count_H/length(data$ValueQualifier)
count_I <- length(grep("I", data$ValueQualifier))
perc_I <- 100*count_I/length(data$ValueQualifier)
count_Q <- length(grep("Q", data$ValueQualifier))
perc_Q <- 100*count_Q/length(data$ValueQualifier)
count_S <- length(grep("S", data$ValueQualifier))
perc_S <- 100*count_S/length(data$ValueQualifier)
count_U <- length(grep("U", data$ValueQualifier))
perc_U <- 100*count_U/length(data$ValueQualifier)
data$VQ_Plot <- data$ValueQualifier
inc_H <- ifelse(param_name=="pH" | param_name=="Dissolved_Oxygen" |
param_name=="Dissolved_Oxygen_Saturation", TRUE, FALSE)
if (inc_H==TRUE){
data$VQ_Plot <- gsub("[^HU]+", "", data$VQ_Plot)
data$VQ_Plot <- gsub("UH", "HU", data$VQ_Plot)
data$VQ_Plot[na.omit(data$ProgramID!=476)] <- gsub("[^U]+", "",
data$VQ_Plot[
na.omit(data$ProgramID!=476)])
data$VQ_Plot[data$VQ_Plot==""] <- NA
cat(paste0("Number of Measurements: ", total,
", Number Passed Filter: ", pass_filter, "\n",
"Program 476 H Codes: ", count_H, " (", round(perc_H, 6), "%)\n",
"I Codes: ", count_I, " (", round(perc_I, 6), "%)\n",
"Q Codes: ", count_Q, " (", round(perc_Q, 6), "%)\n",
"U Codes: ", count_U, " (", round(perc_U, 6), "%)"))
} else if (param_name=="Secchi_Depth") {
count_S <- length(grep("S", data$ValueQualifier))
perc_S <- 100*count_S/length(data$ValueQualifier)
data$VQ_Plot <- gsub("[^SU]+", "", data$VQ_Plot)
data$VQ_Plot <- gsub("US", "SU", data$VQ_Plot)
data$VQ_Plot[data$VQ_Plot==""] <- NA
cat(paste0("Number of Measurements: ", total,
", Number Passed Filter: ", pass_filter, "\n",
"I Codes: ", count_I, " (", round(perc_I, 6), "%)\n",
"Q Codes: ", count_Q, " (", round(perc_Q, 6), "%)\n",
"S Codes: ", count_S, " (", round(perc_S, 6), "%)\n",
"U Codes: ", count_U, " (", round(perc_U, 6), "%)"))
} else{
data$VQ_Plot <- gsub("[^U]+", "", data$VQ_Plot)
data$VQ_Plot[data$VQ_Plot==""] <- NA
cat(paste0("Number of Measurements: ", total,
", Number Passed Filter: ", pass_filter, "\n",
"I Codes: ", count_I, " (", round(perc_I, 6), "%)\n",
"Q Codes: ", count_Q, " (", round(perc_Q, 6), "%)\n",
"U Codes: ", count_U, " (", round(perc_U, 6), "%)"))
}
## Number of Measurements: 95204, Number Passed Filter: 95204
## I Codes: 11403 (11.977438%)
## Q Codes: 1225 (1.286711%)
## U Codes: 2876 (3.020881%)
data_summ <- data %>%
group_by(AreaID, ManagedAreaName) %>%
dplyr::summarize(ParameterName=parameter,
RelativeDepth=depth,
ActivityType=activity,
N_Total=length(ResultValue),
N_AnalysisUse=length(ResultValue[SufficientData==TRUE]),
N_H=length(grep("H", data$ValueQualifier[data$ProgramID==476])),
perc_H=100*N_H/length(data$ValueQualifier),
N_I=length(grep("I", data$ValueQualifier)),
perc_I=100*N_I/length(data$ValueQualifier),
N_Q=length(grep("Q", data$ValueQualifier)),
perc_Q=100*N_Q/length(data$ValueQualifier),
N_S=length(grep("S", data$ValueQualifier)),
perc_S=100*N_S/length(data$ValueQualifier),
N_U=length(grep("U", data$ValueQualifier)),
perc_U=100*N_U/length(data$ValueQualifier))
data_summ <- as.data.table(data_summ[order(data_summ$ManagedAreaName), ])
fwrite(data_summ, paste0(out_dir,"/", param_name, "_", activity, "_", depth,
"_DataSummary.csv"), sep=",")
rm(data_summ)
MA_Include <- MA_Summ$ManagedAreaName[MA_Summ$SufficientData==TRUE &
MA_Summ$N_Data<2000000]
n <- length(MA_Include)
MA_Exclude <- MA_Summ[MA_Summ$N_Years<10 & MA_Summ$N_Years>0,]
MA_Exclude <- MA_Exclude[,c("ManagedAreaName", "N_Years")]
z <- nrow(MA_Exclude)
setDT(data)
Gets summary statistics for each managed area. Excluded managed areas are not included into whether the data should be used or not. Uses piping from dplyr package to feed into subsequent steps. The following steps are performed:
data variable and only include rows that have
a SufficientData value of TRUEManagedAreaName,
Year, and Month.
Month grouping
and are only for ManagedAreaName and
Year.Year grouping and
are only for ManagedAreaName and MonthManagedAreaName then Year then
MonthMA_YM_Stats <- data[data$Use_In_Analysis==TRUE, ] %>%
group_by(AreaID, ManagedAreaName, Year, Month) %>%
dplyr::summarize(ParameterName=parameter,
RelativeDepth=depth,
ActivityType=activity,
N_Data=length(ResultValue),
Min=min(ResultValue),
Max=max(ResultValue),
Median=median(ResultValue),
Mean=mean(ResultValue),
StandardDeviation=sd(ResultValue),
ProgramIDs=paste(sort(unique(ProgramID), decreasing=FALSE),
collapse=', '))
MA_YM_Stats <- as.data.table(MA_YM_Stats[order(MA_YM_Stats$ManagedAreaName,
MA_YM_Stats$Year,
MA_YM_Stats$Month), ])
fwrite(MA_YM_Stats, paste0(out_dir,"/", param_name, "_", activity, "_", depth,
"_ManagedArea_YearMonth_Stats.txt"), sep="|")
rm(MA_YM_Stats)
MA_Y_Stats <- data[data$Use_In_Analysis==TRUE, ] %>%
group_by(AreaID, ManagedAreaName, Year) %>%
dplyr::summarize(ParameterName=parameter,
RelativeDepth=depth,
ActivityType=activity,
N=length(ResultValue),
Min=min(ResultValue),
Max=max(ResultValue),
Median=median(ResultValue),
Mean=mean(ResultValue),
StandardDeviation=sd(ResultValue),
ProgramIDs=paste(sort(unique(ProgramID), decreasing=FALSE),
collapse=', '))
MA_Y_Stats <- as.data.table(MA_Y_Stats[order(MA_Y_Stats$ManagedAreaName,
MA_Y_Stats$Year), ])
fwrite(MA_Y_Stats, paste0(out_dir,"/", param_name, "_", activity, "_", depth,
"_ManagedArea_Year_Stats.txt"), sep="|")
rm(MA_Y_Stats)
MA_M_Stats <- data[data$Use_In_Analysis==TRUE, ] %>%
group_by(AreaID, ManagedAreaName, Month) %>%
dplyr::summarize(ParameterName=parameter,
RelativeDepth=depth,
ActivityType=activity,
N=length(ResultValue),
Min=min(ResultValue),
Max=max(ResultValue),
Median=median(ResultValue),
Mean=mean(ResultValue),
StandardDeviation=sd(ResultValue),
ProgramIDs=paste(sort(unique(ProgramID), decreasing=FALSE),
collapse=', '))
MA_M_Stats <- as.data.table(MA_M_Stats[order(MA_M_Stats$ManagedAreaName,
MA_M_Stats$Month), ])
fwrite(MA_M_Stats, paste0(out_dir,"/", param_name, "_", activity, "_", depth,
"_ManagedArea_Month_Stats.txt"), sep="|")
#rm(MA_M_Stats)
Gets monitoring location statistics, which is defined as a unique
combination of ManagedAreaName, ProgramID,
ProgramAreaName, and ProgramLocationID, using
piping from dplyr package. The following steps are performed:
data variable and only include rows that have
a SufficientData value of TRUEManagedAreaName,
ProgramID, ProgramName, and
ProgramLocationID.ManagedAreaName then ProgramName then
ProgramID then ProgramLocationIDMon_Stats <- data[data$Use_In_Analysis==TRUE, ] %>%
group_by(AreaID, ManagedAreaName, ProgramID, ProgramName,
ProgramLocationID) %>%
dplyr::summarize(ParameterName=parameter,
RelativeDepth=depth,
ActivityType=activity,
EarliestSampleDate=min(SampleDate),
LastSampleDate=max(SampleDate),
N=length(ResultValue),
Min=min(ResultValue),
Max=max(ResultValue),
Median=median(ResultValue),
Mean=mean(ResultValue),
StandardDeviation=sd(ResultValue))
Mon_Stats <- as.data.table(Mon_Stats[order(Mon_Stats$ManagedAreaName,
Mon_Stats$ProgramName,
Mon_Stats$ProgramID,
Mon_Stats$ProgramLocationID), ])
fwrite(Mon_Stats, paste0(out_dir,"/", param_name, "_", activity, "_", depth,
"_MonitoringLoc_Stats.txt"), sep="|")
rm(Mon_Stats)
Gets seasonal Kendall Tau statistics using the
kendallSeasonalTrendTest from the EnvStats
package. The Trend parameter is determined from a
user-defined function based on the median, Senn slope, and p values from
the data. Analysis modified from code created by Jason Scolaro that
performed at The Water Atlas: https://sarasota.wateratlas.usf.edu/water-quality-trends/#analysis-overview
The following steps are performed:
data variable and only include rows that have
a SufficientData value of TRUEManagedAreaName.kendallSeasonalTrendTest
function using the Year values for year, and
Month as the seasonal qualifier, and Trend.TRUE indicates that the
data should be treated as not being serially auto-correlated. An
independent.obs value of FALSE indicates that it is treated
as being serially auto-correlated, but also requires one observation per
season per year for the full time of observation.tauSeasonal <- function(data, independent, stats.median, stats.minYear,
stats.maxYear, seasondata=MA_M_Stats[
MA_M_Stats$ManagedAreaName==MA_Include[i]]) {
setDT(data)
tau <- NULL
tryCatch({ken <- kendallSeasonalTrendTest(
y=data$ResultValue,
season=data$Month,
year=data$relyear,
independent.obs=independent)
tau <- ken$estimate[1]
z <- ken$statistic[2]
p_z <- ken$p.value[2]
chi_sq <- ken$statistic[1]
p_chi_sq <- ken$p.value[1]
slope <- ken$estimate[2]
intercept <- ken$estimate[3]
trend <- trend_calculator(slope, stats.median, p_z)
seasonresults <- as.data.table(ken$seasonal.estimates)
rm(ken)
}, warning=function(w) {
print(w)
}, error=function(e) {
print(e)
}, finally={
if (!exists("tau")) {
tau <- NA
}
if (!exists("z")) {
z <- NA
}
if (!exists("p_z")) {
p_z <- NA
}
if (!exists("chi_sq")) {
chi_sq <- NA
}
if (!exists("p_chi_sq")) {
p_chi_sq <- NA
}
if (!exists("slope")) {
slope <- NA
}
if (!exists("intercept")) {
intercept <- NA
}
if (!exists("trend")) {
trend <- NA
}
})
KT <-data.table(AreaID=unique(data$AreaID),
ManagedAreaName=unique(data$ManagedAreaName),
season="All",
stats.median=stats.median,
independent=independent,
tau=tau,
z=z,
p_z=p_z,
chi_sq=chi_sq,
p_chi_sq=p_chi_sq,
slope=slope,
intercept=intercept,
trend=trend)
seasonresults[, `:=` (AreaID=unique(data$AreaID),
ManagedAreaName=unique(data$ManagedAreaName),
season=unique(data$Month),
stats.median=as.numeric(NA),
independent=independent,
z=as.numeric(NA),
p_z=as.numeric(NA),
chi_sq=as.numeric(NA),
p_chi_sq=as.numeric(NA),
trend=as.integer(NA))]
for(s in as.integer(unique(seasonresults$season))){
seasondat_s <- data[Month==s, ]
if(nrow(seasondat_s) < 3 | length(unique(seasondat_s$Year)) < 3 |
is.na(seasonresults[season==s, tau])){
next
} else{
if(!is.na(unique(seasondat_s$Month))){
trend_s <- trend_calculator(seasonresults[season==s, slope],
seasondata[Month==s, Median], p_z)
ken_s <- kendallTrendTest(ResultValue ~ relyear, data=seasondat_s)
seasonresults[season==s, `:=` (stats.median=unique(seasondata[
Month==s, Median]),
z=ken_s$statistic,
p_z=ken_s$p.value,
chi_sq=NA,
p_chi_sq=NA,
trend=trend_s)]
} else{
next
}
}
}
seasonresults[, season := as.character(season)]
KT <- rbind(KT, seasonresults)
KT[, season := factor(season, levels=c("All", seq(1:12)), ordered=TRUE)]
return(KT)
}
runStats <- function(data, MA_M_Stats) {
data$Index <- as.Date(data$SampleDate) # , "%Y-%m-%d")
data$ResultValue <- as.numeric(data$ResultValue)
# Calculate basic stats
stats.median <- median(data$ResultValue, na.rm=TRUE)
stats.minYear <- min(data$relyear, na.rm=TRUE)
stats.maxYear <- max(data$relyear, na.rm=TRUE)
# Calculate Kendall Tau and Slope stats,
# then update appropriate columns and table
seasondata <- MA_M_Stats[MA_M_Stats$ManagedAreaName==MA_Include[i]]
KT <- tauSeasonal(data, TRUE, stats.median,
stats.minYear, stats.maxYear, seasondata)
# if (is.null(KT[9])) {
if (is.na(KT[season=="All", trend])) {
KT <- tauSeasonal(data, FALSE, stats.median,
stats.minYear, stats.maxYear, seasondata)
}
if (is.null(KT.Stats)==TRUE) {
KT.Stats <- KT
} else{
KT.Stats <- rbind(KT.Stats, KT)
}
return(KT.Stats)
}
trend_calculator <- function(slope, median_value, p) {
trend <-
if (p < .05 & abs(slope) > abs(median_value) / 10.) {
if (slope > 0) {
2
}
else {
-2
}
}
else if (p < .05 & abs(slope) < abs(median_value) / 10.) {
if (slope > 0) {
1
}
else {
-1
}
}
else
0
return(trend)
}
KT.Stats <- NULL
# Loop that goes through each managed area.
# List of managed areas stored in MA_Years$ManagedAreaName
c_names <- c("AreaID", "ManagedAreaName", "Season", "Median", "Independent",
"tau", "z", "p_z", "chi_sq", "p_chi_sq", "SennSlope",
"SennIntercept", "Trend")
if(n==0){
KT.Stats <- data.frame(matrix(ncol=length(c_names),
nrow=length(MA_Summ$ManagedAreaName)))
colnames(KT.Stats) <- c_names
# KT.Stats[, c("AreaID", "ManagedAreaName")] <-
# MA_Summ[, c("AreaID", "ManagedAreaName")]
} else{
for (i in 1:n) {
x <- nrow(data[data$Use_In_Analysis==TRUE &
data$ManagedAreaName==MA_Include[i], ])
if (x>0) {
KT.Stats <- runStats(data[data$Use_In_Analysis==TRUE &
data$ManagedAreaName ==
MA_Include[i], ], MA_M_Stats)
}
}
KT.Stats <- as.data.frame(KT.Stats)
if(dim(KT.Stats)[2]==1){
KT.Stats <- as.data.frame(t(KT.Stats))
}
colnames(KT.Stats) <- c_names
rownames(KT.Stats) <- seq(1:nrow(KT.Stats))
KT.Stats$tau <- round(as.numeric(KT.Stats$tau), digits=4)
KT.Stats$z <- round(as.numeric(KT.Stats$z), digits=4)
KT.Stats$p_z <- round(as.numeric(KT.Stats$p_z), digits=4)
KT.Stats$chi_sq <- round(as.numeric(KT.Stats$chi_sq), digits=4)
KT.Stats$p_chi_sq <- round(as.numeric(KT.Stats$p_chi_sq), digits=4)
KT.Stats$SennSlope <- as.numeric(KT.Stats$SennSlope)
KT.Stats$SennIntercept <- as.numeric(KT.Stats$SennIntercept)
KT.Stats$Trend <- as.integer(KT.Stats$Trend)
}
KT.Stats <- merge.data.frame(MA_Summ, KT.Stats,
by=c("AreaID", "ManagedAreaName"), all=TRUE)
KT.Stats <- as.data.table(KT.Stats[order(KT.Stats$ManagedAreaName,
KT.Stats$Season), ])
KT.Stats2 <- copy(KT.Stats)
KT.Stats[, `:=` (RelativeDepth=depth, Units=unit)]
KT.Stats_all <- rbind(KT.Stats_all, KT.Stats)
KT.Stats2$MonitoringID <- NULL
fwrite(KT.Stats2, paste0(out_dir,"/", param_name, "_", activity, "_", depth,
"_KendallTau_Stats.txt"), sep="|")
rm(KT.Stats2)
data <- data[!is.na(data$ResultValue),]
plot_theme <- theme_bw() +
theme(text=element_text(family="Segoe UI"),
title=element_text(face="bold"),
plot.title=element_text(hjust=0.5, size=14, color="#314963"),
plot.subtitle=element_text(hjust=0.5, size=10, color="#314963"),
axis.title.x=element_text(margin=margin(t=5, r=0,
b=10, l=0)),
axis.title.y=element_text(margin=margin(t=0, r=10,
b=0, l=0)),
axis.text=element_text(size=10),
axis.text.x=element_text(face="bold", angle=60, hjust=1),
axis.text.y=element_text(face="bold"))
This part will create a scatter plot of the all data that passed
initial filtering criteria with points colored based on specific value
qualifiers. The values determined at the beginning
(year_lower, year_upper, min_RV,
mn_RV, x_scale, and y_scale) are
solely for use by the plotting functions and are not output as part of
the computed statistics.
plot_theme <- theme_bw() +
theme(text=element_text(family="Segoe UI"),
title=element_text(face="bold"),
plot.title=element_text(hjust=0.5, size=14, color="#314963"),
plot.subtitle=element_text(hjust=0.5, size=10, color="#314963"),
axis.title.x=element_text(margin=margin(t=5, r=0,
b=10, l=0)),
axis.title.y=element_text(margin=margin(t=0, r=10,
b=0, l=0)),
axis.text=element_text(size=10),
axis.text.x=element_text(face="bold", angle=60, hjust=1),
axis.text.y=element_text(face="bold"))
year_lower <- min(data$Year)
year_upper <- max(data$Year)
min_RV <- min(data$ResultValue)
mn_RV <- mean(data$ResultValue[data$ResultValue <
quantile(data$ResultValue, 0.98)])
sd_RV <- sd(data$ResultValue[data$ResultValue <
quantile(data$ResultValue, 0.98)])
x_scale <- ifelse(year_upper - year_lower > 30, 10, 5)
y_scale <- mn_RV + 4 * sd_RV
p1 <- ggplot(data=data[data$Include==TRUE,],
aes(x=SampleDate, y=ResultValue, fill=VQ_Plot)) +
geom_point(shape=21, size=3, color="#333333", alpha=0.75) +
labs(subtitle="Autoscale",
x="Year", y=paste0("Values (", unit, ")"),
fill="Value Qualifier") +
plot_theme +
theme(legend.position="top", legend.box="horizontal",
legend.justification="right") +
scale_x_date(labels=date_format("%Y")) +
{if(inc_H==TRUE){
scale_fill_manual(values=c("H"= "#F8766D", "U"= "#00BFC4",
"HU"="#7CAE00"), na.value="#cccccc")
} else if(param_name=="Secchi_Depth"){
scale_fill_manual(values=c("S"= "#F8766D", "U"= "#00BFC4",
"SU"="#7CAE00"), na.value="#cccccc")
} else {
scale_fill_manual(values=c("U"= "#00BFC4"), na.value="#cccccc")
}}
p2 <- ggplot(data=data[data$Include==TRUE,],
aes(x=SampleDate, y=ResultValue, fill=VQ_Plot)) +
geom_point(shape=21, size=3, color="#333333", alpha=0.75) +
ylim(min_RV, y_scale) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Year", y=paste0("Values (", unit, ")")) +
plot_theme +
theme(legend.position="none") +
scale_x_date(labels=date_format("%Y")) +
{if(inc_H==TRUE){
scale_fill_manual(values=c("H"= "#F8766D", "U"= "#00BFC4",
"HU"="#7CAE00"), na.value="#cccccc")
} else if(param_name=="Secchi_Depth"){
scale_fill_manual(values=c("S"= "#F8766D", "U"= "#00BFC4",
"SU"="#7CAE00"), na.value="#cccccc")
} else {
scale_fill_manual(values=c("U"= "#00BFC4"), na.value="#cccccc")
}}
leg <- get_legend(p1)
pset <- ggarrange(leg, p1 + theme(legend.position="none"), p2,
ncol=1, heights=c(0.1, 1, 1))
p0 <- ggplot() + labs(title="Scatter Plot for Entire Dataset") +
plot_theme + theme(panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
axis.line=element_blank())
ggarrange(p0, pset, ncol=1, heights=c(0.1, 1))
Box plots are created by using the entire data set and excludes any data that has been previously filtered out. The scripts that create plots follow this format
SufficientData of
TRUEThis set of box plots are grouped by year.
min_RV <- min(data$ResultValue[data$Include==TRUE])
mn_RV <- mean(data$ResultValue[data$Include==TRUE &
data$ResultValue <
quantile(data$ResultValue, 0.98)])
sd_RV <- sd(data$ResultValue[data$Include==TRUE &
data$ResultValue <
quantile(data$ResultValue, 0.98)])
y_scale <- mn_RV + 4 * sd_RV
p1 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Autoscale", x="Year",
y=paste0("Values (", unit, ")")) +
plot_theme
p2 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation", x="Year",
y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
plot_theme
p3 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=as.integer(Year), y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(max(data$Year) - 10.5, max(data$Year)+1),
breaks=seq(max(data$Year) - 10, max(data$Year), 2)) +
plot_theme
set <- ggarrange(p1, p2, p3, ncol=1)
p0 <- ggplot() + labs(title="Summary Box Plots for Entire Data",
subtitle="By Year") + plot_theme +
theme(panel.border=element_blank(), panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
Yset <- ggarrange(p0, set, ncol=1, heights=c(0.07, 1))
This set of box plots are grouped by year and month with the color being related to the month.
p1 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Autoscale", x="Year",
y=paste0("Values (", unit, ")"), color="Month") +
plot_theme +
theme(legend.position="top", legend.box="horizontal") +
guides(color=guide_legend(nrow=1))
p2 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
plot_theme +
theme(legend.position="none", axis.text.x=element_text(face="bold"),
axis.text.y=element_text(face="bold"))
p3 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(max(data$Year) - 10.5, max(data$Year)+1),
breaks=seq(max(data$Year) - 10, max(data$Year), 2)) +
plot_theme +
theme(legend.position="none")
leg <- get_legend(p1)
set <- ggarrange(leg, p1 + theme(legend.position="none"), p2, p3, ncol=1,
heights=c(0.1, 1, 1, 1))
p0 <- ggplot() + labs(title="Summary Box Plots for Entire Data",
subtitle="By Year & Month") + plot_theme +
theme(panel.border=element_blank(), panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
YMset <- ggarrange(p0, set, ncol=1, heights=c(0.07, 1))
The following box plots are grouped by month with fill color being related to the month. This is designed to view potential seasonal trends.
p1 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Autoscale", x="Month",
y=paste0("Values (", unit, ")"), fill="Month") +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="top", legend.box="horizontal") +
guides(fill=guide_legend(nrow=1))
p2 <- ggplot(data=data[data$Include==TRUE, ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Month", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="none")
p3 <- ggplot(data=data[data$Include==TRUE &
data$Year >= max(data$Year) - 10, ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Month", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="none")
leg <- get_legend(p1)
set <- ggarrange(leg, p1 + theme(legend.position="none"), p2, p3, ncol=1,
heights=c(0.1, 1, 1, 1))
p0 <- ggplot() + labs(title="Summary Box Plots for Entire Data",
subtitle="By Month") + plot_theme +
theme(panel.border=element_blank(), panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
Mset <- ggarrange(p0, set, ncol=1, heights=c(0.07, 1))
Scatter plots of data values are created for managed areas that have fewer than 10 separate years of data entries. Data points are colored based on specific value qualifiers of interest.
if(z==0){
print("There are no managed areas that qualify.")
} else {
for(i in 1:z){
p1<-ggplot(data=data[data$ManagedAreaName==MA_Exclude$ManagedAreaName[i] &
data$Include==TRUE, ],
aes(x=SampleDate, y=ResultValue, fill=VQ_Plot)) +
geom_point(shape=21, size=3, color="#333333", alpha=0.75) +
labs(title=paste0(MA_Exclude$ManagedAreaName[i], " (",
MA_Exclude$N_Years[i], " Unique Years)"),
subtitle="Autoscale", x="Year",
y=paste0("Values (", unit, ")"), fill="Value Qualifier") +
plot_theme +
theme(legend.position="top", legend.box="horizontal",
legend.justification="right") +
scale_x_date(labels=date_format("%m-%Y")) +
{if(inc_H==TRUE){
scale_fill_manual(values=c("H"= "#F8766D", "U"= "#00BFC4",
"HU"="#7CAE00"), na.value="#cccccc")
} else if(param_name=="Secchi_Depth"){
scale_fill_manual(values=c("S"= "#F8766D", "U"= "#00BFC4",
"SU"="#7CAE00"), na.value="#cccccc")
} else {
scale_fill_manual(values=c("U"= "#00BFC4"), na.value="#cccccc")
}}
print(p1)
}
}
The plots created in this section are designed to show the general
trend of the data. Data is taken and grouped by
ManagedAreaName. The trendlines on the plots are created
using the Senn slope and intercept from the seasonal Kendall Tau
analysis. The scripts that create plots follow this format
SufficientData of
TRUE for the desired managed areaif(n==0){
print("There are no managed areas that qualify.")
} else {
for (i in 1:n) {
plot_data <- data[data$SufficientData==TRUE &
data$ManagedAreaName==MA_Include[i],]
plot_data$Season <- factor(plot_data$Month, levels=c("All", seq(1, 12)),
ordered=TRUE)
year_lower <- min(plot_data$relyear)
year_upper <- max(plot_data$relyear)
min_RV <- min(plot_data$ResultValue)
mn_RV <- mean(plot_data$ResultValue[plot_data$ResultValue <
quantile(data$ResultValue, 0.98)])
sd_RV <- sd(plot_data$ResultValue[plot_data$ResultValue <
quantile(data$ResultValue, 0.98)])
x_scale <- ifelse(year_upper - year_lower > 30, 10, 5)
y_scale <- mn_RV + 4 * sd_RV
tau <- KT.Stats$tau[KT.Stats$ManagedAreaName==MA_Include[i]]
s_slope <- KT.Stats$SennSlope[KT.Stats$ManagedAreaName==MA_Include[i]]
s_int <- KT.Stats$SennIntercept[KT.Stats$ManagedAreaName==MA_Include[i]]
trend <- KT.Stats$Trend[KT.Stats$ManagedAreaName==MA_Include[i]]
z <- KT.Stats$z[KT.Stats$ManagedAreaName==MA_Include[i]]
p_z <- KT.Stats$p_z[KT.Stats$ManagedAreaName==MA_Include[i]]
chi_sq <- KT.Stats$chi_sq[KT.Stats$ManagedAreaName==MA_Include[i]]
p_chi_sq <- KT.Stats$p_chi_sq[KT.Stats$ManagedAreaName==MA_Include[i]]
# model <- lm(ResultValue ~ relyear_dd,
# data=plot_data)
# m_int <- coef(model)[[1]]
# m_slope <- coef(model)[[2]]
# rm(model)
xbrks <- seq(round_any(min(plot_data$relyear_dd), 5, floor), round_any(max(plot_data$relyear_dd), 5, ceiling),
by=(round_any(max(plot_data$relyear_dd), 5, ceiling) - round_any(min(plot_data$relyear_dd), 5, floor))/5)
xlabs <- seq(max(plot_data$Year) - round_any(max(plot_data$relyear_dd), 5,
ceiling),
max(plot_data$Year),
by=(max(plot_data$Year) - (max(plot_data$Year) - round_any(max(plot_data$relyear_dd), 5, ceiling)))/5)
KT.Stats[, season := Season]
KT.Stats[ManagedAreaName==MA_Include[i] & season != "All", `:=`
(N_Data=nrow(plot_data[Season==season &
ManagedAreaName==MA_Include[i], ]),
relyear_dd_lower=min(plot_data[Season==season &
ManagedAreaName==
MA_Include[i],
relyear_dd]),
relyear_dd_upper=max(plot_data[Season==season &
ManagedAreaName==
MA_Include[i],
relyear_dd])), by="season"]
KT.Stats[ManagedAreaName==MA_Include[i] & season=="All", `:=`
(relyear_dd_lower=min(plot_data[ManagedAreaName==MA_Include[i],
relyear_dd]),
relyear_dd_upper=max(plot_data[
ManagedAreaName==MA_Include[i], relyear_dd])), by="season"]
KT.Stats[, season := NULL]
# plot_data[is.na(VQ_Plot), VQ_Plot := "None"]
p1 <- ggplot(data=plot_data,
aes(x=relyear_dd, y=ResultValue, fill=VQ_Plot)) +
geom_point(shape=21, size=3, color="#333333", alpha=0.75) +
# geom_abline(aes(slope=s_slope, intercept=s_int),
# color="#000099", size=1.2, alpha=0.7) +
geom_segment(data=KT.Stats[ManagedAreaName==MA_Include[i] &
Season=="All", ],
aes(x=relyear_dd_lower, y=relyear_dd_lower * SennSlope +
SennIntercept,
xend=relyear_dd_upper,
yend=relyear_dd_upper * SennSlope + SennIntercept),
color="#000099", size=1.2, alpha=0.7, inherit.aes=FALSE) +
labs(subtitle="Autoscale",
x="Year", y=paste0("Values (", unit, ")"),
fill="Value Qualifier") +
plot_theme +
theme(legend.position="top", legend.box="horizontal",
legend.justification="right") +
{if(inc_H==TRUE){
scale_fill_manual(values=c("H"= "#F8766D", "U"= "#00BFC4",
"HU"="#7CAE00"), na.value="#cccccc")
} else if(param_name=="Secchi_Depth"){
scale_fill_manual(values=c("S"= "#F8766D", "U"= "#00BFC4",
"SU"="#7CAE00"), na.value="#cccccc")
} else {
scale_fill_manual(values=c("U"= "#00BFC4"), na.value="#cccccc")
}} +
scale_x_continuous(breaks=xbrks,
labels=xlabs)
p2 <- ggplot(data=plot_data,
aes(x=relyear_dd, y=ResultValue, fill=VQ_Plot)) +
geom_point(shape=21, size=3, color="#333333", alpha=0.75) +
# geom_abline(aes(slope=s_slope, intercept=s_int),
# color="#000099", size=1.2, alpha=0.7) +
geom_segment(data=KT.Stats[ManagedAreaName==MA_Include[i] &
Season=="All", ],
aes(x=relyear_dd_lower, y=relyear_dd_lower * SennSlope +
SennIntercept, xend=relyear_dd_upper,
yend=relyear_dd_upper * SennSlope + SennIntercept),
color="#000099", size=1.2, alpha=0.7, inherit.aes=FALSE) +
ylim(min_RV, y_scale) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Year", y=paste0("Values (", unit, ")")) +
plot_theme +
theme(legend.position="none") +
{if(inc_H==TRUE){
scale_fill_manual(values=c("H"= "#F8766D", "U"= "#00BFC4",
"HU"="#7CAE00"), na.value="#cccccc")
} else if(param_name=="Secchi_Depth"){
scale_fill_manual(values=c("S"= "#F8766D", "U"= "#00BFC4",
"SU"="#7CAE00"), na.value="#cccccc")
} else {
scale_fill_manual(values=c("U"= "#00BFC4"), na.value="#cccccc")
}} +
scale_x_continuous(breaks=xbrks,
labels=xlabs)
splot <- ggplot(plot_data, aes(x=relyear_dd, y=ResultValue)) +
geom_point(shape=21, size=1.5, color="#333333", fill="#cccccc",
alpha=0.75) +
geom_segment(data=KT.Stats[ManagedAreaName==MA_Include[i] &
Season != "All", ],
aes(x=relyear_dd_lower, y=relyear_dd_lower * SennSlope +
SennIntercept, xend=relyear_dd_upper,
yend=relyear_dd_upper * SennSlope + SennIntercept),
color="#000099", size=1.2, alpha=0.7) +
#ylim(min_RV-0.1*y_scale, y_scale) +
scale_x_continuous(breaks=xbrks,
labels=xlabs) +
labs(y=paste0("Values (", unit, ")"), x="Year",
subtitle="Results for Individual Seasons") +
facet_wrap(~Season, ncol=3) +
plot_theme
leg <- get_legend(p1)
KTset <- ggarrange(leg, p1 + theme(legend.position="none"), p2,
splot, ncol=1, heights=c(0.1, 1, 1, 1.5))
p0 <- ggplot() + labs(title=paste0(MA_Include[i])) +
plot_theme + theme(panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
axis.line=element_blank())
KT.Stats[ManagedAreaName==MA_Include[i], `:=` (N=N_Data,
Median=round(Median, 2),
Slope=round(SennSlope, 4),
Int.=round(SennIntercept, 4),
z=round(z, 1),
chi_sq=round(chi_sq, 1))]
#print(ggarrange(p0, KTset, ncol=1, heights=c(0.1, 1.25)))
ResultTable <- KT.Stats[KT.Stats$ManagedAreaName==MA_Include[i], ] %>%
select(Season, N, Median, tau, Slope, Int., z, p_z, chi_sq, p_chi_sq, Trend)
t1 <- ggtexttable(ResultTable, rows=NULL, theme=ttheme(base_size=11.5)) %>%
tab_add_footnote(text="p_z < 0.00005 appear as 0 due to rounding",
size=10, face="italic")
print(ggarrange(p0, KTset, t1, ncol=1, heights=c(0.015, 0.645, 0.34)))
cat('\n \n \n')
rm(plot_data)
rm(KTset, leg)
}
}
Data is taken and grouped by ManagedAreaName. The
scripts that create plots follow this format
SufficientData of
TRUE for the desired managed areaThe following plots are arranged by ManagedAreaName with
data grouped by Year, then Year and
Month, then finally Month only. Each managed
area will have 3 sets of plots, each with 3 panels in them. Each panel
goes as follows:
if(n==0){
print("There are no managed areas that qualify.")
} else {
for (i in 1:n) {
plot_data <- data[data$SufficientData==TRUE &
data$ManagedAreaName==MA_Include[i],]
year_lower <- min(plot_data$Year)
year_upper <- max(plot_data$Year)
min_RV <- min(plot_data$ResultValue)
mn_RV <- mean(plot_data$ResultValue[plot_data$ResultValue <
quantile(data$ResultValue, 0.98)])
sd_RV <- sd(plot_data$ResultValue[plot_data$ResultValue <
quantile(data$ResultValue, 0.98)])
x_scale <- ifelse(year_upper - year_lower > 30, 10, 5)
y_scale <- mn_RV + 4 * sd_RV
##Year plots
p1 <- ggplot(data=plot_data,
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Autoscale",
x="Year", y=paste0("Values (", unit, ")")) +
scale_x_continuous(limits=c(year_lower - 1, year_upper + 1),
breaks=rev(seq(year_upper,
year_lower, -x_scale))) +
plot_theme
p2 <- ggplot(data=plot_data,
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(year_lower - 1, year_upper + 1),
breaks=rev(seq(year_upper,
year_lower, -x_scale))) +
plot_theme
p3 <- ggplot(data=plot_data[plot_data$Year >= year_upper - 10, ],
aes(x=Year, y=ResultValue, group=Year)) +
geom_boxplot(color="#333333", fill="#cccccc", outlier.shape=21,
outlier.size=3, outlier.color="#333333",
outlier.fill="#cccccc", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Year", y=paste0("Values (", unit, ")")) +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(year_upper - 10.5, year_upper + 1),
breaks=rev(seq(year_upper, year_upper - 10,-2))) +
plot_theme
Yset <- ggarrange(p1, p2, p3, ncol=1)
p0 <- ggplot() + labs(title=paste0(MA_Include[i]),
subtitle="By Year") +
plot_theme + theme(panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
axis.line=element_blank())
## Year & Month Plots
p4 <- ggplot(data=plot_data,
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Autoscale",
x="Year", y=paste0("Values (", unit, ")"), color="Month") +
scale_x_continuous(limits=c(year_lower - 1, year_upper + 1),
breaks=rev(seq(year_upper,
year_lower, -x_scale))) +
plot_theme +
theme(legend.position="none")
p5 <- ggplot(data=plot_data,
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Year", y=paste0("Values (", unit, ")"), color="Month") +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(year_lower - 1, year_upper + 1),
breaks=rev(seq(year_upper,
year_lower, -x_scale))) +
plot_theme +
theme(legend.position="top", legend.box="horizontal") +
guides(color=guide_legend(nrow=1))
p6 <- ggplot(data=plot_data[plot_data$Year >= year_upper - 10, ],
aes(x=YearMonthDec, y=ResultValue,
group=YearMonth, color=as.factor(Month))) +
geom_boxplot(fill="#cccccc", outlier.size=1.5, outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Year", y=paste0("Values (", unit, ")"), color="Month") +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(year_upper - 10.5, year_upper + 1),
breaks=rev(seq(year_upper, year_upper - 10,-2))) +
plot_theme +
theme(legend.position="none")
leg1 <- get_legend(p5)
YMset <- ggarrange(leg1, p4, p5 + theme(legend.position="none"), p6,
ncol=1, heights=c(0.1, 1, 1, 1))
p00 <- ggplot() + labs(title=paste0(MA_Include[i]),
subtitle="By Year & Month") + plot_theme +
theme(panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
## Month Plots
p7 <- ggplot(data=plot_data,
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Autoscale",
x="Month", y=paste0("Values (", unit, ")"), fill="Month") +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="none")
p8 <- ggplot(data=plot_data,
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation",
x="Month", y=paste0("Values (", unit, ")"), fill="Month") +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="top", legend.box="horizontal") +
guides(fill=guide_legend(nrow=1))
p9 <- ggplot(data=plot_data[plot_data$Year >= year_upper - 10, ],
aes(x=Month, y=ResultValue,
group=Month, fill=as.factor(Month))) +
geom_boxplot(color="#333333", outlier.shape=21, outlier.size=3,
outlier.color="#333333", outlier.alpha=0.75) +
labs(subtitle="Scaled to 4x Standard Deviation, Last 10 Years",
x="Month", y=paste0("Values (", unit, ")"), fill="Month") +
ylim(min_RV, y_scale) +
scale_x_continuous(limits=c(0, 13), breaks=seq(3, 12, 3)) +
plot_theme +
theme(legend.position="none")
leg2 <- get_legend(p8)
Mset <- ggarrange(leg2, p7, p8 + theme(legend.position="none"), p9,
ncol=1, heights=c(0.1, 1, 1, 1))
p000 <- ggplot() + labs(title=paste0(MA_Include[i]),
subtitle="By Month") + plot_theme +
theme(panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), axis.line=element_blank())
print(ggarrange(p0, Yset, ncol=1, heights=c(0.07, 1)))
print(ggarrange(p00, YMset, ncol=1, heights=c(0.07, 1)))
print(ggarrange(p000, Mset, ncol=1, heights=c(0.07, 1, 0.7)))
rm(plot_data)
rm(p1, p2, p3, p4, p5, p6, p7, p8, p9, p0, p00, p000, leg1, leg2,
Yset, YMset, Mset)
}
}
rm(list=setdiff(ls(), c("all_params", "all_depths", "all_activity",
"param_name", "depth", "activity", "KT.Stats_all",
"MA_All", "APP_Plots", "out_dir", "file_in", "file_out")))